{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"D2H6YLCPOXvZ"},"outputs":[],"source":["!pip install transformers\n","!pip install datasets\n","!pip install numpy\n","!pip install pandas\n","!pip install accelerate\n","!pip install evaluate"]},{"cell_type":"markdown","source":["# New Section"],"metadata":{"id":"MGNLTxGsKNzp"}},{"cell_type":"code","execution_count":8,"metadata":{"id":"H-LANn-hUlZh","collapsed":true,"executionInfo":{"status":"ok","timestamp":1751457280965,"user_tz":-480,"elapsed":12,"user":{"displayName":"Wen Qu","userId":"17890693673485394938"}}},"outputs":[],"source":["import numpy as np\n","np.random.seed(11)\n","import torch\n","torch.manual_seed(11)\n","import random\n","random.seed(11)\n","\n","import transformers\n","import pandas as pd\n","import evaluate\n","\n","from torch.utils.data import Dataset\n","from datasets import Dataset, DatasetDict\n","from sklearn.metrics import classification_report, precision_recall_fscore_support\n","from sklearn.model_selection import train_test_split\n","from transformers import (\n","    AutoTokenizer,\n","    AutoModelForSequenceClassification,\n","    Trainer,\n","    TrainingArguments,\n","    RobertaForSequenceClassification,\n","    RobertaTokenizerFast,\n","    set_seed\n",")\n"]},{"cell_type":"code","source":["# ==============================================================================\n","# Define Parameters to Match the R Experiment\n","# ==============================================================================\n","# These values should exactly match one of the runs from R script.\n","\n","MODEL_NAME = \"distilroberta-base\"\n","NUM_LABELS = 15\n","OUTPUT_DIR = \"./python-results\"\n"],"metadata":{"id":"vxrprl3D6XKV","executionInfo":{"status":"ok","timestamp":1751456038160,"user_tz":-480,"elapsed":3,"user":{"displayName":"Wen Qu","userId":"17890693673485394938"}}},"execution_count":5,"outputs":[]},{"cell_type":"code","source":["\n","MODEL_NAME = \"distilroberta-base\"\n","NUM_LABELS = 15\n","N_RUNS = 3\n","\n","# ==============================================================================\n","# Define Helper Functions\n","# ==============================================================================\n","\n","# This function will be called by the Trainer during evaluation\n","def compute_metrics_for_trainer(pred):\n","    \"\"\"Calculates weighted metrics for the Trainer's evaluation step.\"\"\"\n","    labels = pred.label_ids\n","    preds = pred.predictions.argmax(-1)\n","    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted', zero_division=0)\n","    return {'f1': f1, 'precision': precision, 'recall': recall}\n","\n","# This function defines how to initialize a fresh model for each run\n","def model_init():\n","    \"\"\"Initializes a new model for each training run.\"\"\"\n","    set_seed(training_args.seed)\n","    return AutoModelForSequenceClassification.from_pretrained(\n","        MODEL_NAME,\n","        num_labels=NUM_LABELS\n","    )"],"metadata":{"id":"UJNU1VGh-mr2","executionInfo":{"status":"ok","timestamp":1751458045939,"user_tz":-480,"elapsed":15,"user":{"displayName":"Wen Qu","userId":"17890693673485394938"}}},"execution_count":19,"outputs":[]},{"cell_type":"code","source":["class CustomTextDataset(Dataset):\n","    def __init__(self, dataframe, tokenizer, max_length):\n","        self.tokenizer = tokenizer\n","        self.data = dataframe\n","        self.max_length = max_length\n","\n","    def __len__(self):\n","        return len(self.data)\n","\n","    def __getitem__(self, idx):\n","        text = self.data.iloc[idx]['text']\n","        label = self.data.iloc[idx]['label']\n","\n","        encoding = self.tokenizer(\n","            text,\n","            truncation=True,\n","            padding='max_length',\n","            max_length=self.max_length,\n","            return_tensors='pt'\n","        )\n","\n","        return {\n","            'input_ids': encoding['input_ids'].squeeze(),\n","            'attention_mask': encoding['attention_mask'].squeeze(),\n","            'labels': torch.tensor(label, dtype=torch.long)\n","        }"],"metadata":{"id":"0PeOTv__Yw09"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["all_precisions = []\n","all_recalls = []\n","all_f1s = []\n","\n","for i in range(1, N_RUNS + 1):\n","    run_name = f\"run_{i}\"\n","    run_seed = 11 * i\n","    print(f\"\\n===== STARTING RUN {i}/{N_RUNS} (Seed: {run_seed}) =====\\n\")\n","\n","    # --- Load Data for the Current Run ---\n","    try:\n","        train_df = pd.read_csv(f\"run_{i}_train.csv\")\n","        validation_df = pd.read_csv(f\"run_{i}_validation.csv\")\n","        test_df = pd.read_csv(f\"run_{i}_test.csv\")\n","    except FileNotFoundError as e:\n","        print(f\"Error: Could not find data files for {run_name}. Make sure they are uploaded.\")\n","        print(f\"Details: {e}\")\n","        continue\n","\n","    # --- Preprocess Data (Tokenization and Custom Dataset Creation) ---\n","    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)\n","    mlength = 512\n","\n","    train_dataset = CustomTextDataset(train_df, tokenizer, mlength)\n","    validation_dataset = CustomTextDataset(validation_df, tokenizer, mlength)\n","    test_dataset = CustomTextDataset(test_df, tokenizer, mlength)\n","\n","\n","    # --- Define Training Arguments for this Run ---\n","\n","    training_args = TrainingArguments(\n","        output_dir=f\"./results/{run_name}\",\n","        num_train_epochs=10,\n","        learning_rate=2e-5,\n","        per_device_train_batch_size=64,\n","        per_device_eval_batch_size=128,\n","        weight_decay=0.05,\n","        warmup_steps=0,\n","        eval_strategy=\"epoch\",\n","        logging_strategy=\"epoch\",\n","        save_strategy=\"epoch\",\n","        load_best_model_at_end=True,\n","        metric_for_best_model=\"precision\",\n","        seed=11,\n","    )\n","\n","    # --- Initialize Trainer ---\n","\n","    trainer = Trainer(\n","        model_init=model_init,\n","        args=training_args,\n","        train_dataset=train_dataset,\n","        eval_dataset=validation_dataset,\n","        compute_metrics=compute_metrics_for_trainer,\n","    )\n","\n","    # --- Run Training and Evaluation ---\n","    print(f\"--- Training {run_name} ---\")\n","    trainer.train()\n","\n","    print(f\"--- Evaluating {run_name} on Test Set ---\")\n","    prediction_output = trainer.predict(test_dataset)\n","\n","    # Use classification_report to get detailed metrics, just like the R package\n","    report = classification_report(\n","        prediction_output.label_ids,\n","        prediction_output.predictions.argmax(-1),\n","        output_dict=True,\n","        zero_division=0\n","    )\n","\n","    # Store the final weighted average metrics for this run\n","    weighted_avg_metrics = report['weighted avg']\n","    all_precisions.append(weighted_avg_metrics.get('precision'))\n","    all_recalls.append(weighted_avg_metrics.get('recall'))\n","    all_f1s.append(weighted_avg_metrics.get('f1-score'))\n","\n","    print(f\"--- Results for {run_name} ---\")\n","    print(f\"Precision: {weighted_avg_metrics.get('precision'):.4f}, Recall: {weighted_avg_metrics.get('recall'):.4f}, F1: {weighted_avg_metrics.get('f1-score'):.4f}\")\n","\n","print(\"\\n\\n===== FINAL AGGREGATED SUMMARY =====\\n\")\n","print(f\"Mean Precision: {np.mean(all_precisions):.4f}\")\n","print(f\"Mean Recall:    {np.mean(all_recalls):.4f}\")\n","print(f\"Mean F1-Score:  {np.mean(all_f1s):.4f}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"ffrky5xH-o8e","executionInfo":{"status":"ok","timestamp":1751462055600,"user_tz":-480,"elapsed":4006461,"user":{"displayName":"Wen Qu","userId":"17890693673485394938"}},"outputId":"765a5066-4a59-48b3-c724-e71bfc40846f"},"execution_count":20,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","===== STARTING RUN 1/3 (Seed: 11) =====\n","\n"]},{"output_type":"stream","name":"stderr","text":["Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]},{"output_type":"stream","name":"stdout","text":["--- Training run_1 ---\n"]},{"output_type":"stream","name":"stderr","text":["Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n","    <div>\n","      \n","      <progress value='980' max='980' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [980/980 21:58, Epoch 10/10]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Epoch</th>\n","      <th>Training Loss</th>\n","      <th>Validation Loss</th>\n","      <th>F1</th>\n","      <th>Precision</th>\n","      <th>Recall</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <td>1</td>\n","      <td>2.156700</td>\n","      <td>1.585891</td>\n","      <td>0.488072</td>\n","      <td>0.537700</td>\n","      <td>0.532526</td>\n","    </tr>\n","    <tr>\n","      <td>2</td>\n","      <td>1.417700</td>\n","      <td>1.243228</td>\n","      <td>0.607923</td>\n","      <td>0.602782</td>\n","      <td>0.629464</td>\n","    </tr>\n","    <tr>\n","      <td>3</td>\n","      <td>1.118600</td>\n","      <td>1.107818</td>\n","      <td>0.639141</td>\n","      <td>0.639382</td>\n","      <td>0.658163</td>\n","    </tr>\n","    <tr>\n","      <td>4</td>\n","      <td>0.936800</td>\n","      <td>1.074373</td>\n","      <td>0.658415</td>\n","      <td>0.660247</td>\n","      <td>0.673469</td>\n","    </tr>\n","    <tr>\n","      <td>5</td>\n","      <td>0.821000</td>\n","      <td>1.046523</td>\n","      <td>0.665434</td>\n","      <td>0.670122</td>\n","      <td>0.677296</td>\n","    </tr>\n","    <tr>\n","      <td>6</td>\n","      <td>0.730700</td>\n","      <td>1.037865</td>\n","      <td>0.679811</td>\n","      <td>0.683526</td>\n","      <td>0.688776</td>\n","    </tr>\n","    <tr>\n","      <td>7</td>\n","      <td>0.656900</td>\n","      <td>1.034257</td>\n","      <td>0.681762</td>\n","      <td>0.689748</td>\n","      <td>0.689413</td>\n","    </tr>\n","    <tr>\n","      <td>8</td>\n","      <td>0.596400</td>\n","      <td>1.034579</td>\n","      <td>0.683618</td>\n","      <td>0.693244</td>\n","      <td>0.690051</td>\n","    </tr>\n","    <tr>\n","      <td>9</td>\n","      <td>0.552800</td>\n","      <td>1.039659</td>\n","      <td>0.682637</td>\n","      <td>0.692502</td>\n","      <td>0.691327</td>\n","    </tr>\n","    <tr>\n","      <td>10</td>\n","      <td>0.530300</td>\n","      <td>1.037732</td>\n","      <td>0.687187</td>\n","      <td>0.694349</td>\n","      <td>0.693878</td>\n","    </tr>\n","  </tbody>\n","</table><p>"]},"metadata":{}},{"output_type":"stream","name":"stdout","text":["--- Evaluating run_1 on Test Set ---\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[]},"metadata":{}},{"output_type":"stream","name":"stdout","text":["--- Results for run_1 ---\n","Precision: 0.6690, Recall: 0.6789, F1: 0.6711\n","\n","===== STARTING RUN 2/3 (Seed: 22) =====\n","\n"]},{"output_type":"stream","name":"stderr","text":["Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]},{"output_type":"stream","name":"stdout","text":["--- Training run_2 ---\n"]},{"output_type":"stream","name":"stderr","text":["Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n","    <div>\n","      \n","      <progress value='980' max='980' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [980/980 21:58, Epoch 10/10]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Epoch</th>\n","      <th>Training Loss</th>\n","      <th>Validation Loss</th>\n","      <th>F1</th>\n","      <th>Precision</th>\n","      <th>Recall</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <td>1</td>\n","      <td>2.177400</td>\n","      <td>1.628503</td>\n","      <td>0.440988</td>\n","      <td>0.467369</td>\n","      <td>0.496173</td>\n","    </tr>\n","    <tr>\n","      <td>2</td>\n","      <td>1.449000</td>\n","      <td>1.240109</td>\n","      <td>0.589405</td>\n","      <td>0.595713</td>\n","      <td>0.616709</td>\n","    </tr>\n","    <tr>\n","      <td>3</td>\n","      <td>1.145000</td>\n","      <td>1.130384</td>\n","      <td>0.619336</td>\n","      <td>0.628385</td>\n","      <td>0.641582</td>\n","    </tr>\n","    <tr>\n","      <td>4</td>\n","      <td>0.960800</td>\n","      <td>1.074195</td>\n","      <td>0.645364</td>\n","      <td>0.665006</td>\n","      <td>0.656250</td>\n","    </tr>\n","    <tr>\n","      <td>5</td>\n","      <td>0.845200</td>\n","      <td>1.054526</td>\n","      <td>0.650495</td>\n","      <td>0.661307</td>\n","      <td>0.665179</td>\n","    </tr>\n","    <tr>\n","      <td>6</td>\n","      <td>0.743000</td>\n","      <td>1.039300</td>\n","      <td>0.668551</td>\n","      <td>0.670756</td>\n","      <td>0.677296</td>\n","    </tr>\n","    <tr>\n","      <td>7</td>\n","      <td>0.670500</td>\n","      <td>1.039228</td>\n","      <td>0.669549</td>\n","      <td>0.670801</td>\n","      <td>0.677934</td>\n","    </tr>\n","    <tr>\n","      <td>8</td>\n","      <td>0.616300</td>\n","      <td>1.037431</td>\n","      <td>0.670065</td>\n","      <td>0.674040</td>\n","      <td>0.677934</td>\n","    </tr>\n","    <tr>\n","      <td>9</td>\n","      <td>0.578200</td>\n","      <td>1.040355</td>\n","      <td>0.671935</td>\n","      <td>0.681152</td>\n","      <td>0.678571</td>\n","    </tr>\n","    <tr>\n","      <td>10</td>\n","      <td>0.556600</td>\n","      <td>1.039895</td>\n","      <td>0.667226</td>\n","      <td>0.675713</td>\n","      <td>0.674745</td>\n","    </tr>\n","  </tbody>\n","</table><p>"]},"metadata":{}},{"output_type":"stream","name":"stdout","text":["--- Evaluating run_2 on Test Set ---\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[]},"metadata":{}},{"output_type":"stream","name":"stdout","text":["--- Results for run_2 ---\n","Precision: 0.6941, Recall: 0.7034, F1: 0.6958\n","\n","===== STARTING RUN 3/3 (Seed: 33) =====\n","\n"]},{"output_type":"stream","name":"stderr","text":["Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]},{"output_type":"stream","name":"stdout","text":["--- Training run_3 ---\n"]},{"output_type":"stream","name":"stderr","text":["Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n","    <div>\n","      \n","      <progress value='980' max='980' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [980/980 21:58, Epoch 10/10]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Epoch</th>\n","      <th>Training Loss</th>\n","      <th>Validation Loss</th>\n","      <th>F1</th>\n","      <th>Precision</th>\n","      <th>Recall</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <td>1</td>\n","      <td>2.181200</td>\n","      <td>1.616700</td>\n","      <td>0.492392</td>\n","      <td>0.492148</td>\n","      <td>0.528061</td>\n","    </tr>\n","    <tr>\n","      <td>2</td>\n","      <td>1.427500</td>\n","      <td>1.271149</td>\n","      <td>0.591091</td>\n","      <td>0.584620</td>\n","      <td>0.612245</td>\n","    </tr>\n","    <tr>\n","      <td>3</td>\n","      <td>1.117400</td>\n","      <td>1.167376</td>\n","      <td>0.624229</td>\n","      <td>0.614235</td>\n","      <td>0.640944</td>\n","    </tr>\n","    <tr>\n","      <td>4</td>\n","      <td>0.939700</td>\n","      <td>1.118526</td>\n","      <td>0.648662</td>\n","      <td>0.654040</td>\n","      <td>0.661352</td>\n","    </tr>\n","    <tr>\n","      <td>5</td>\n","      <td>0.792200</td>\n","      <td>1.117678</td>\n","      <td>0.653469</td>\n","      <td>0.656945</td>\n","      <td>0.661352</td>\n","    </tr>\n","    <tr>\n","      <td>6</td>\n","      <td>0.714300</td>\n","      <td>1.124612</td>\n","      <td>0.652316</td>\n","      <td>0.654627</td>\n","      <td>0.658801</td>\n","    </tr>\n","    <tr>\n","      <td>7</td>\n","      <td>0.637500</td>\n","      <td>1.115566</td>\n","      <td>0.666985</td>\n","      <td>0.666505</td>\n","      <td>0.673469</td>\n","    </tr>\n","    <tr>\n","      <td>8</td>\n","      <td>0.576700</td>\n","      <td>1.124133</td>\n","      <td>0.667819</td>\n","      <td>0.668771</td>\n","      <td>0.672194</td>\n","    </tr>\n","    <tr>\n","      <td>9</td>\n","      <td>0.531100</td>\n","      <td>1.127345</td>\n","      <td>0.666342</td>\n","      <td>0.665927</td>\n","      <td>0.671556</td>\n","    </tr>\n","    <tr>\n","      <td>10</td>\n","      <td>0.505900</td>\n","      <td>1.124076</td>\n","      <td>0.672602</td>\n","      <td>0.671389</td>\n","      <td>0.677934</td>\n","    </tr>\n","  </tbody>\n","</table><p>"]},"metadata":{}},{"output_type":"stream","name":"stdout","text":["--- Evaluating run_3 on Test Set ---\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[]},"metadata":{}},{"output_type":"stream","name":"stdout","text":["--- Results for run_3 ---\n","Precision: 0.6990, Recall: 0.7014, F1: 0.6962\n","\n","\n","===== FINAL AGGREGATED SUMMARY =====\n","\n","Mean Precision: 0.6873\n","Mean Recall:    0.6946\n","Mean F1-Score:  0.6877\n"]}]}],"metadata":{"accelerator":"GPU","colab":{"gpuType":"L4","machine_shape":"hm","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.5"}},"nbformat":4,"nbformat_minor":0}